Last execution time: 07/04/2025 05:42:46
Get data
Products type filter
explore_types = ['frutas', 'lacteos', 'verduras', 'embutidos', 'panaderia', 'desayuno', 'congelados', 'abarrotes',
'aves', 'carnes', 'pescados']Data table
path = Path('../../output')
csv_files = L(path.glob('*.csv')).filter(lambda o: os.stat(o).st_size>0)
pat_store = re.compile('(.+)\_\d+')
pat_date = re.compile('.+\_(\d+)')
df = (
pd.concat([pd.read_csv(o).assign(store=pat_store.match(o.stem)[1], date=pat_date.match(o.stem)[1])
for o in csv_files], ignore_index=True)
.pipe(lambda d: d.assign(
name=d.name.str.lower()+' ('+d.store+')',
sku=d.id.where(d.sku.isna(), d.sku).astype(int),
date=pd.to_datetime(d.date)
))
.drop('id', axis=1)
.loc[lambda d: d.category.str.contains('|'.join(explore_types))]
# Filter products with recent data
# .loc[lambda d: d.name.isin(d.groupby('name').date.max().loc[ge(datetime.now()-timedelta(days=30))].index)]
# Filter empty prices
.loc[lambda d: d.price>0]
)
print(df.shape)
df.sample(3)(1331183, 8)
| sku | name | brand | category | uri | price | store | date | |
|---|---|---|---|---|---|---|---|---|
| 792638 | 11069381 | pasta de ajo walibi doypack 85g (plaza_vea) | WALIBI | https://www.plazavea.com.pe/abarrotes | https://www.plazavea.com.pe/pasta-de-ajo-walib... | 3.29 | plaza_vea | 2024-01-15 |
| 1175665 | 10588582 | chuletón de pierna redondos duroc x kg (plaza_... | REDONDOS | https://www.plazavea.com.pe/carnes-aves-y-pesc... | https://www.plazavea.com.pe/chuleton-de-pierna... | 30.90 | plaza_vea | 2024-08-26 |
| 1853605 | 10215193 | galletas rellenitas de fresa paquete 90g (plaz... | GN | https://www.plazavea.com.pe/abarrotes | https://www.plazavea.com.pe/galletas-rellenita... | 2.50 | plaza_vea | 2025-03-24 |
Top changes (ratio)
Code
top_changes = (df
# Use last 30 days of data to compare prices
.loc[lambda d: d.date>=(datetime.now()-timedelta(days=30))]
.sort_values('date')
# Get percentage change
.assign(change=lambda d: d
.groupby(['store','sku'], as_index=False)
.price.transform(lambda d: (d-d.shift())/d.shift())
)
.groupby(['store','sku'], as_index=False)
.agg({'price':'last', 'change':'mean', 'date':'last'})
.rename({'price':'last_price', 'date':'last_date'}, axis=1)
.dropna()
.loc[lambda d: d.last_date==d.last_date.max()]
.loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes.head(3)| store | sku | last_price | change | last_date | |
|---|---|---|---|---|---|
| 536 | plaza_vea | 2574 | 7.9 | 0.919435 | 2025-04-07 |
| 591 | plaza_vea | 2919 | 4.2 | -0.330215 | 2025-04-07 |
| 643 | plaza_vea | 3083 | 6.2 | -0.247276 | 2025-04-07 |
Code
def plot_changes(df_changes, title):
selection = alt.selection_point(fields=['name'], bind='legend')
dff = df_changes.drop('change', axis=1).merge(df, on=['store','sku'])
return (dff
.pipe(alt.Chart)
.mark_line(point=True)
.encode(
x='date',
y='price',
color=alt.Color('name').scale(domain=sorted(dff.name.unique().tolist())),
tooltip=['name','price','last_price']
)
.add_params(selection)
.transform_filter(selection)
.interactive()
.properties(width=650, title=title)
.configure_legend(orient='top', columns=3)
)Code
top_changes.head(10).pipe(plot_changes, 'Top changes')Code
(top_changes
.sort_values('change')
.head(10)
.pipe(plot_changes, 'Top drops')
)Code
(top_changes
.sort_values('change')
.tail(10)
.pipe(plot_changes, 'Top increases')
)Top changes (absolute values)
Code
top_changes_abs = (df
# Use last 30 days of data to compare prices
.loc[lambda d: d.date>=(datetime.now()-timedelta(days=30))]
.sort_values('date')
# Get percentage change
.assign(change=lambda d: d
.groupby(['store','sku'], as_index=False)
.price.transform(lambda d: (d-d.shift()).iloc[-1])
)
.groupby(['store','sku'], as_index=False)
.agg({'price':'last', 'change':'mean', 'date':'last'})
.rename({'price':'last_price', 'date':'last_date'}, axis=1)
.dropna()
.loc[lambda d: d.last_date==d.last_date.max()]
.loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes_abs.head(3)| store | sku | last_price | change | last_date | |
|---|---|---|---|---|---|
| 2576 | plaza_vea | 10037893 | 251.0 | 45.4 | 2025-04-07 |
| 2350 | plaza_vea | 10012680 | 157.5 | 44.7 | 2025-04-07 |
| 5766 | plaza_vea | 11737880 | 99.9 | 21.9 | 2025-04-07 |
Code
top_changes_abs.head(10).pipe(plot_changes, 'Top changes')Code
(top_changes_abs
.sort_values('change')
.head(10)
.pipe(plot_changes, 'Top drops')
)Code
(top_changes_abs
.sort_values('change')
.tail(10)
.pipe(plot_changes, 'Top increases')
)Search specific products
Code
(df
.loc[df.name.isin(names)]
.pipe(alt.Chart)
.mark_line(point=True)
.encode(x='date', y='price', color='name', tooltip=['name','price'])
.properties(width=650, title='Pollo')
.interactive()
.configure_legend(orient='top', columns=3)
)Code
(df
.loc[df.name.isin(names)]
.pipe(alt.Chart)
.mark_line(point=True)
.encode(x='date', y='price', color='name', tooltip=['name','price'])
.properties(width=650, title='Palta')
.interactive()
.configure_legend(orient='top', columns=3)
)Code
(df
.loc[df.name.isin(names)]
.pipe(alt.Chart)
.mark_line(point=True)
.encode(x='date', y='price', color='name', tooltip=['name','price'])
.properties(width=650, title='Aceite')
.interactive()
.configure_legend(orient='top', columns=3)
)Code
(df
.loc[df.name.isin(names)]
.pipe(alt.Chart)
.mark_line(point=True)
.encode(x='date', y='price', color='name', tooltip=['name','price'])
.properties(width=650, title='Aceite')
.interactive()
.configure_legend(orient='top', columns=3)
)Code
(df
.loc[df.name.isin(names)]
.pipe(alt.Chart)
.mark_line(point=True)
.encode(x='date', y='price', color='name', tooltip=['name','price'])
.properties(width=650, title='Aceite')
.interactive()
.configure_legend(orient='top', columns=3)
)